getwd()
## [1] "/Users/preetvilu/Downloads"
#setwd("/Users/preetvilu/Downloads")
data1 = read.csv("/Users/preetvilu/Downloads/Life Expectancy Data.csv")
It is a listing of life expectancy observations collected over 193 countries spanning across year 2000 to 2015 published by World Health Organization (WHO) and United Nations (UN). Categories of factors that can affect life expectancy: * Social factors: Status, GDP, Income Composition of Resource, Schooling * Mortality: Adult mortality, Infant death and Number of under-five deaths * Healthcare-related: Immunization coverage (Polio, Hepatitis B, Measles and Diphtheria), Alcohol consumption, HIV/AIDS recorded cases, BMI and prevalence of thinness among children and adoslescents. # Initial observations and hypothesis
dim(data1)
## [1] 2938 22
nrow(data1)
## [1] 2938
ncol(data1)
## [1] 22
head(data1)
## Country Year Status Life.expectancy Adult.Mortality infant.deaths
## 1 Afghanistan 2015 Developing 65.0 263 62
## 2 Afghanistan 2014 Developing 59.9 271 64
## 3 Afghanistan 2013 Developing 59.9 268 66
## 4 Afghanistan 2012 Developing 59.5 272 69
## 5 Afghanistan 2011 Developing 59.2 275 71
## 6 Afghanistan 2010 Developing 58.8 279 74
## Alcohol percentage.expenditure Hepatitis.B Measles BMI under.five.deaths
## 1 0.01 71.279624 65 1154 19.1 83
## 2 0.01 73.523582 62 492 18.6 86
## 3 0.01 73.219243 64 430 18.1 89
## 4 0.01 78.184215 67 2787 17.6 93
## 5 0.01 7.097109 68 3013 17.2 97
## 6 0.01 79.679367 66 1989 16.7 102
## Polio Total.expenditure Diphtheria HIV.AIDS GDP Population
## 1 6 8.16 65 0.1 584.25921 33736494
## 2 58 8.18 62 0.1 612.69651 327582
## 3 62 8.13 64 0.1 631.74498 31731688
## 4 67 8.52 67 0.1 669.95900 3696958
## 5 68 7.87 68 0.1 63.53723 2978599
## 6 66 9.20 66 0.1 553.32894 2883167
## thinness..1.19.years thinness.5.9.years Income.composition.of.resources
## 1 17.2 17.3 0.479
## 2 17.5 17.5 0.476
## 3 17.7 17.7 0.470
## 4 17.9 18.0 0.463
## 5 18.2 18.2 0.454
## 6 18.4 18.4 0.448
## Schooling
## 1 10.1
## 2 10.0
## 3 9.9
## 4 9.8
## 5 9.5
## 6 9.2
summary(data1)
## Country Year Status Life.expectancy
## Length:2938 Min. :2000 Length:2938 Min. :36.30
## Class :character 1st Qu.:2004 Class :character 1st Qu.:63.10
## Mode :character Median :2008 Mode :character Median :72.10
## Mean :2008 Mean :69.22
## 3rd Qu.:2012 3rd Qu.:75.70
## Max. :2015 Max. :89.00
## NA's :10
## Adult.Mortality infant.deaths Alcohol percentage.expenditure
## Min. : 1.0 Min. : 0.0 Min. : 0.0100 Min. : 0.000
## 1st Qu.: 74.0 1st Qu.: 0.0 1st Qu.: 0.8775 1st Qu.: 4.685
## Median :144.0 Median : 3.0 Median : 3.7550 Median : 64.913
## Mean :164.8 Mean : 30.3 Mean : 4.6029 Mean : 738.251
## 3rd Qu.:228.0 3rd Qu.: 22.0 3rd Qu.: 7.7025 3rd Qu.: 441.534
## Max. :723.0 Max. :1800.0 Max. :17.8700 Max. :19479.912
## NA's :10 NA's :194
## Hepatitis.B Measles BMI under.five.deaths
## Min. : 1.00 Min. : 0.0 Min. : 1.00 Min. : 0.00
## 1st Qu.:77.00 1st Qu.: 0.0 1st Qu.:19.30 1st Qu.: 0.00
## Median :92.00 Median : 17.0 Median :43.50 Median : 4.00
## Mean :80.94 Mean : 2419.6 Mean :38.32 Mean : 42.04
## 3rd Qu.:97.00 3rd Qu.: 360.2 3rd Qu.:56.20 3rd Qu.: 28.00
## Max. :99.00 Max. :212183.0 Max. :87.30 Max. :2500.00
## NA's :553 NA's :34
## Polio Total.expenditure Diphtheria HIV.AIDS
## Min. : 3.00 Min. : 0.370 Min. : 2.00 Min. : 0.100
## 1st Qu.:78.00 1st Qu.: 4.260 1st Qu.:78.00 1st Qu.: 0.100
## Median :93.00 Median : 5.755 Median :93.00 Median : 0.100
## Mean :82.55 Mean : 5.938 Mean :82.32 Mean : 1.742
## 3rd Qu.:97.00 3rd Qu.: 7.492 3rd Qu.:97.00 3rd Qu.: 0.800
## Max. :99.00 Max. :17.600 Max. :99.00 Max. :50.600
## NA's :19 NA's :226 NA's :19
## GDP Population thinness..1.19.years
## Min. : 1.68 Min. :3.400e+01 Min. : 0.10
## 1st Qu.: 463.94 1st Qu.:1.958e+05 1st Qu.: 1.60
## Median : 1766.95 Median :1.387e+06 Median : 3.30
## Mean : 7483.16 Mean :1.275e+07 Mean : 4.84
## 3rd Qu.: 5910.81 3rd Qu.:7.420e+06 3rd Qu.: 7.20
## Max. :119172.74 Max. :1.294e+09 Max. :27.70
## NA's :448 NA's :652 NA's :34
## thinness.5.9.years Income.composition.of.resources Schooling
## Min. : 0.10 Min. :0.0000 Min. : 0.00
## 1st Qu.: 1.50 1st Qu.:0.4930 1st Qu.:10.10
## Median : 3.30 Median :0.6770 Median :12.30
## Mean : 4.87 Mean :0.6276 Mean :11.99
## 3rd Qu.: 7.20 3rd Qu.:0.7790 3rd Qu.:14.30
## Max. :28.60 Max. :0.9480 Max. :20.70
## NA's :34 NA's :167 NA's :163
View(data1)
# This is the R chunk for the Understand Section
str(data1)
## 'data.frame': 2938 obs. of 22 variables:
## $ Country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ Year : int 2015 2014 2013 2012 2011 2010 2009 2008 2007 2006 ...
## $ Status : chr "Developing" "Developing" "Developing" "Developing" ...
## $ Life.expectancy : num 65 59.9 59.9 59.5 59.2 58.8 58.6 58.1 57.5 57.3 ...
## $ Adult.Mortality : int 263 271 268 272 275 279 281 287 295 295 ...
## $ infant.deaths : int 62 64 66 69 71 74 77 80 82 84 ...
## $ Alcohol : num 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.03 0.02 0.03 ...
## $ percentage.expenditure : num 71.3 73.5 73.2 78.2 7.1 ...
## $ Hepatitis.B : int 65 62 64 67 68 66 63 64 63 64 ...
## $ Measles : int 1154 492 430 2787 3013 1989 2861 1599 1141 1990 ...
## $ BMI : num 19.1 18.6 18.1 17.6 17.2 16.7 16.2 15.7 15.2 14.7 ...
## $ under.five.deaths : int 83 86 89 93 97 102 106 110 113 116 ...
## $ Polio : int 6 58 62 67 68 66 63 64 63 58 ...
## $ Total.expenditure : num 8.16 8.18 8.13 8.52 7.87 9.2 9.42 8.33 6.73 7.43 ...
## $ Diphtheria : int 65 62 64 67 68 66 63 64 63 58 ...
## $ HIV.AIDS : num 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
## $ GDP : num 584.3 612.7 631.7 670 63.5 ...
## $ Population : num 33736494 327582 31731688 3696958 2978599 ...
## $ thinness..1.19.years : num 17.2 17.5 17.7 17.9 18.2 18.4 18.6 18.8 19 19.2 ...
## $ thinness.5.9.years : num 17.3 17.5 17.7 18 18.2 18.4 18.7 18.9 19.1 19.3 ...
## $ Income.composition.of.resources: num 0.479 0.476 0.47 0.463 0.454 0.448 0.434 0.433 0.415 0.405 ...
## $ Schooling : num 10.1 10 9.9 9.8 9.5 9.2 8.9 8.7 8.4 8.1 ...
1.Adult_mortality has a positive relationship with education, the composition of resource income, and a positive relationship with HIV / AIDS.(Wrong, Adult_mortality has a negative relationship with education, r = -0.4, )
2.Infant_deaths and Under_five_deaths have a strong positive relationship.(false,r = 0.1)
3.Schooling and alcohol have a positive relationship.(True, r =0.5)
4.Percentage expenditure has a negative relationship with education, the composition of resource income, GDP and life expectancy.(Wrong, Percentage expenditure has a positive relationship with education, r = 0.4)
5.Polio also has a strong positive relationship with diphtheria, hepatitis B, and life expectancy.(True, r = 0.5,0.4,0.7)
6.Diphtheria has a negative relationship with polio and life expectancy.(Wrong, Diphtheria has a positive relationship with polio, r =0.7, 0.5)
# Checking null values in the data
```r
colSums(is.na(data1)) #this will give null values of all the coulmns in the data
## Country Year
## 0 0
## Status Life.expectancy
## 0 10
## Adult.Mortality infant.deaths
## 10 0
## Alcohol percentage.expenditure
## 194 0
## Hepatitis.B Measles
## 553 0
## BMI under.five.deaths
## 34 0
## Polio Total.expenditure
## 19 226
## Diphtheria HIV.AIDS
## 19 0
## GDP Population
## 448 652
## thinness..1.19.years thinness.5.9.years
## 34 34
## Income.composition.of.resources Schooling
## 167 163
colSums(is.na(data1)/lengths(data1)*100)
## Country Year
## 0.0000000 0.0000000
## Status Life.expectancy
## 0.0000000 0.3403676
## Adult.Mortality infant.deaths
## 0.3403676 0.0000000
## Alcohol percentage.expenditure
## 6.6031314 0.0000000
## Hepatitis.B Measles
## 18.8223281 0.0000000
## BMI under.five.deaths
## 1.1572498 0.0000000
## Polio Total.expenditure
## 0.6466984 7.6923077
## Diphtheria HIV.AIDS
## 0.6466984 0.0000000
## GDP Population
## 15.2484683 22.1919673
## thinness..1.19.years thinness.5.9.years
## 1.1572498 1.1572498
## Income.composition.of.resources Schooling
## 5.6841389 5.5479918
str(data1)
## 'data.frame': 2938 obs. of 22 variables:
## $ Country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ Year : int 2015 2014 2013 2012 2011 2010 2009 2008 2007 2006 ...
## $ Status : chr "Developing" "Developing" "Developing" "Developing" ...
## $ Life.expectancy : num 65 59.9 59.9 59.5 59.2 58.8 58.6 58.1 57.5 57.3 ...
## $ Adult.Mortality : int 263 271 268 272 275 279 281 287 295 295 ...
## $ infant.deaths : int 62 64 66 69 71 74 77 80 82 84 ...
## $ Alcohol : num 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.03 0.02 0.03 ...
## $ percentage.expenditure : num 71.3 73.5 73.2 78.2 7.1 ...
## $ Hepatitis.B : int 65 62 64 67 68 66 63 64 63 64 ...
## $ Measles : int 1154 492 430 2787 3013 1989 2861 1599 1141 1990 ...
## $ BMI : num 19.1 18.6 18.1 17.6 17.2 16.7 16.2 15.7 15.2 14.7 ...
## $ under.five.deaths : int 83 86 89 93 97 102 106 110 113 116 ...
## $ Polio : int 6 58 62 67 68 66 63 64 63 58 ...
## $ Total.expenditure : num 8.16 8.18 8.13 8.52 7.87 9.2 9.42 8.33 6.73 7.43 ...
## $ Diphtheria : int 65 62 64 67 68 66 63 64 63 58 ...
## $ HIV.AIDS : num 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
## $ GDP : num 584.3 612.7 631.7 670 63.5 ...
## $ Population : num 33736494 327582 31731688 3696958 2978599 ...
## $ thinness..1.19.years : num 17.2 17.5 17.7 17.9 18.2 18.4 18.6 18.8 19 19.2 ...
## $ thinness.5.9.years : num 17.3 17.5 17.7 18 18.2 18.4 18.7 18.9 19.1 19.3 ...
## $ Income.composition.of.resources: num 0.479 0.476 0.47 0.463 0.454 0.448 0.434 0.433 0.415 0.405 ...
## $ Schooling : num 10.1 10 9.9 9.8 9.5 9.2 8.9 8.7 8.4 8.1 ...
for (i in 1:ncol(data1))
{
print(names(data1)[i])
}
## [1] "Country"
## [1] "Year"
## [1] "Status"
## [1] "Life.expectancy"
## [1] "Adult.Mortality"
## [1] "infant.deaths"
## [1] "Alcohol"
## [1] "percentage.expenditure"
## [1] "Hepatitis.B"
## [1] "Measles"
## [1] "BMI"
## [1] "under.five.deaths"
## [1] "Polio"
## [1] "Total.expenditure"
## [1] "Diphtheria"
## [1] "HIV.AIDS"
## [1] "GDP"
## [1] "Population"
## [1] "thinness..1.19.years"
## [1] "thinness.5.9.years"
## [1] "Income.composition.of.resources"
## [1] "Schooling"
#A. Impute the missing value by mean.
#* median imputation of Diphtheria as data is highly skewed for this variable #* median is higher than mean
data1$Life.expectancy[is.na(data1$Life.expectancy)] = mean(data1$Life.expectancy[!is.na(data1$Life.expectancy)])
data1$Adult.Mortality[is.na(data1$Adult.Mortality)] = mean(data1$Adult.Mortality[!is.na(data1$Adult.Mortality)])
data1$Alcohol[is.na(data1$Alcohol)] = mean(data1$Alcohol[!is.na(data1$Alcohol)])
data1$BMI[is.na(data1$BMI)] = mean(data1$BMI[!is.na(data1$BMI)])
data1$Polio[is.na(data1$Polio)] = median(data1$Polio[!is.na(data1$Polio)])
data1$Total.expenditure[is.na(data1$Total.expenditure)] = mean(data1$Total.expenditure[!is.na(data1$Total.expenditure)])
data1$Diphtheria[is.na(data1$Diphtheria)] = median(data1$Diphtheria[!is.na(data1$Diphtheria)])
data1$Schooling[is.na(data1$Schooling)] = mean(data1$Schooling[!is.na(data1$Schooling)])
data1$thinness..1.19.years[is.na(data1$thinness..1.19.years)] = mean(data1$thinness..1.19.years[!is.na(data1$thinness..1.19.years)])
data1$thinness.5.9.years[is.na(data1$thinness.5.9.years)] = mean(data1$thinness.5.9.years[!is.na(data1$thinness.5.9.years)])
data1$Hepatitis.B[is.na(data1$Hepatitis.B)] = median(data1$Hepatitis.B[!is.na(data1$Hepatitis.B)])
data1$Income.composition.of.resources[is.na(data1$Income.composition.of.resources)] = mean(data1$Income.composition.of.resources[!is.na(data1$Income.composition.of.resources)])
#data1$GDP[is.na(data1$GDP)] = mean(data1$GDP[!is.na(data1$GDP)])
#data1$Population[is.na(data1$Population)] = mean(data1$Population[!is.na(data1$Population)])
1.Insights from summary and distribution of data is the key factor in deciding which imputation method to use, all variables are mean imputed except for those variables where data is highly skewed. 2.Mean Imputation is done for the variables like Life.expectancy,Adult.Mortality,Alcohol,BMI,Total.expenditure,Schooling,thinness..1.19.years,thinness.5.9.years,Income.composition.of.resources. 3. Median Imputation for variables like Polio,Diphtheria,Hepatitis.B
colSums(is.na(data1))
## Country Year
## 0 0
## Status Life.expectancy
## 0 0
## Adult.Mortality infant.deaths
## 0 0
## Alcohol percentage.expenditure
## 0 0
## Hepatitis.B Measles
## 0 0
## BMI under.five.deaths
## 0 0
## Polio Total.expenditure
## 0 0
## Diphtheria HIV.AIDS
## 0 0
## GDP Population
## 448 652
## thinness..1.19.years thinness.5.9.years
## 0 0
## Income.composition.of.resources Schooling
## 0 0
unique(data1$Status)
## [1] "Developing" "Developed"
data1$GDP[is.na(data1$GDP[data1$Status <= 'Developing'])] = 9780.859485# mean imputaion
data1$GDP[is.na(data1$GDP[data1$Status <= 'Developed'])] = 7381.496237# mean imputation
data1$Population[is.na(data1$Population[data1$Status <= 'Developed'])] = 10616524.52 # mean imputaion
data1$Population[is.na(data1$Population[data1$Status <= 'Developing'])] = 9380299.804 # mean imputaion
colSums(is.na(data1))
## Country Year
## 0 0
## Status Life.expectancy
## 0 0
## Adult.Mortality infant.deaths
## 0 0
## Alcohol percentage.expenditure
## 0 0
## Hepatitis.B Measles
## 0 0
## BMI under.five.deaths
## 0 0
## Polio Total.expenditure
## 0 0
## Diphtheria HIV.AIDS
## 0 0
## GDP Population
## 0 0
## thinness..1.19.years thinness.5.9.years
## 0 0
## Income.composition.of.resources Schooling
## 0 0
data1_con = data1[,c("Adult.Mortality","infant.deaths","Measles","under.five.deaths",
"Polio","Diphtheria", "Population","Life.expectancy","Alcohol",
"percentage.expenditure","Hepatitis.B","BMI", "Total.expenditure",
"HIV.AIDS", "GDP", "thinness..1.19.years","thinness.5.9.years",
"Income.composition.of.resources","Schooling")]
We are plotting the histogram and boxplots for all numerical columns stored in the data1_con variable.
for (i in 1:ncol(data1_con))
{
par(mfrow=c(2,1))# it says two rows and one columns and par stands for partition
hist(data1_con[,i], xlab=names(data1_con)[i], main = paste("histogran of",names(data1_con)[i], col ="lightblue"))
boxplot(data1_con[,i], xlab=names(data1_con)[i],horizontal = T,
main = paste("boxplot of",names(data1_con)[i], col = "lightblue"))# horizontal == T makes boxplot horizontal
}
summary(data1)
## Country Year Status Life.expectancy
## Length:2938 Min. :2000 Length:2938 Min. :36.30
## Class :character 1st Qu.:2004 Class :character 1st Qu.:63.20
## Mode :character Median :2008 Mode :character Median :72.00
## Mean :2008 Mean :69.22
## 3rd Qu.:2012 3rd Qu.:75.60
## Max. :2015 Max. :89.00
## Adult.Mortality infant.deaths Alcohol percentage.expenditure
## Min. : 1.0 Min. : 0.0 Min. : 0.010 Min. : 0.000
## 1st Qu.: 74.0 1st Qu.: 0.0 1st Qu.: 1.093 1st Qu.: 4.685
## Median :144.0 Median : 3.0 Median : 4.160 Median : 64.913
## Mean :164.8 Mean : 30.3 Mean : 4.603 Mean : 738.251
## 3rd Qu.:227.0 3rd Qu.: 22.0 3rd Qu.: 7.390 3rd Qu.: 441.534
## Max. :723.0 Max. :1800.0 Max. :17.870 Max. :19479.912
## Hepatitis.B Measles BMI under.five.deaths
## Min. : 1.00 Min. : 0.0 Min. : 1.00 Min. : 0.00
## 1st Qu.:82.00 1st Qu.: 0.0 1st Qu.:19.40 1st Qu.: 0.00
## Median :92.00 Median : 17.0 Median :43.00 Median : 4.00
## Mean :83.02 Mean : 2419.6 Mean :38.32 Mean : 42.04
## 3rd Qu.:96.00 3rd Qu.: 360.2 3rd Qu.:56.10 3rd Qu.: 28.00
## Max. :99.00 Max. :212183.0 Max. :87.30 Max. :2500.00
## Polio Total.expenditure Diphtheria HIV.AIDS
## Min. : 3.00 Min. : 0.370 Min. : 2.00 Min. : 0.100
## 1st Qu.:78.00 1st Qu.: 4.370 1st Qu.:78.00 1st Qu.: 0.100
## Median :93.00 Median : 5.938 Median :93.00 Median : 0.100
## Mean :82.62 Mean : 5.938 Mean :82.39 Mean : 1.742
## 3rd Qu.:97.00 3rd Qu.: 7.330 3rd Qu.:97.00 3rd Qu.: 0.800
## Max. :99.00 Max. :17.600 Max. :99.00 Max. :50.600
## GDP Population thinness..1.19.years
## Min. : 1.68 Min. :3.400e+01 Min. : 0.10
## 1st Qu.: 580.49 1st Qu.:6.186e+05 1st Qu.: 1.60
## Median : 3116.56 Median :8.222e+06 Median : 3.40
## Mean : 7833.52 Mean :1.191e+07 Mean : 4.84
## 3rd Qu.: 9780.86 3rd Qu.:1.062e+07 3rd Qu.: 7.10
## Max. :119172.74 Max. :1.294e+09 Max. :27.70
## thinness.5.9.years Income.composition.of.resources Schooling
## Min. : 0.10 Min. :0.0000 Min. : 0.00
## 1st Qu.: 1.60 1st Qu.:0.5042 1st Qu.:10.30
## Median : 3.40 Median :0.6620 Median :12.10
## Mean : 4.87 Mean :0.6276 Mean :11.99
## 3rd Qu.: 7.20 3rd Qu.:0.7720 3rd Qu.:14.10
## Max. :28.60 Max. :0.9480 Max. :20.70
Earlier we used one column at a time to plot the graphs which was Univariate analysis.Here we are comparing Life.expectancy column with all other columns using a scatterplot.
plot(data1$Life.expectancy, data1$Adult.Mortality)
plot(data1$Life.expectancy, data1$BMI)
plot(data1$Life.expectancy, data1$HIV.AIDS)
plot(data1$Life.expectancy, data1$Income.composition.of.resources)
plot(data1$Life.expectancy, data1$Schooling)
Earlier we used one column at a time to plot the boxplot which was Univariate analysis.Here we are comparing the Column with condition to Column “Status”.
boxplot(data1$Population ~ data1$Status)
boxplot(data1$GDP ~ data1$Status)
boxplot(data1$Life.expectancy ~ data1$Status)
boxplot(data1$Life.expectancy ~ data1$Status)
boxplot(data1$infant.deaths ~ data1$Status)
boxplot(data1$Hepatitis.B ~ data1$Status)
# Correlation matrix
```r
round(cor(data1[4:22]),1)
## Life.expectancy Adult.Mortality infant.deaths
## Life.expectancy 1.0 -0.7 -0.2
## Adult.Mortality -0.7 1.0 0.1
## infant.deaths -0.2 0.1 1.0
## Alcohol 0.4 -0.2 -0.1
## percentage.expenditure 0.4 -0.2 -0.1
## Hepatitis.B 0.2 -0.1 -0.2
## Measles -0.2 0.0 0.5
## BMI 0.6 -0.4 -0.2
## under.five.deaths -0.2 0.1 1.0
## Polio 0.5 -0.3 -0.2
## Total.expenditure 0.2 -0.1 -0.1
## Diphtheria 0.5 -0.3 -0.2
## HIV.AIDS -0.6 0.5 0.0
## GDP 0.4 -0.3 -0.1
## Population 0.0 0.0 0.5
## thinness..1.19.years -0.5 0.3 0.5
## thinness.5.9.years -0.5 0.3 0.5
## Income.composition.of.resources 0.7 -0.4 -0.1
## Schooling 0.7 -0.4 -0.2
## Alcohol percentage.expenditure Hepatitis.B
## Life.expectancy 0.4 0.4 0.2
## Adult.Mortality -0.2 -0.2 -0.1
## infant.deaths -0.1 -0.1 -0.2
## Alcohol 1.0 0.3 0.1
## percentage.expenditure 0.3 1.0 0.0
## Hepatitis.B 0.1 0.0 1.0
## Measles -0.1 -0.1 -0.1
## BMI 0.3 0.2 0.1
## under.five.deaths -0.1 -0.1 -0.2
## Polio 0.2 0.1 0.4
## Total.expenditure 0.3 0.2 0.1
## Diphtheria 0.2 0.1 0.4
## HIV.AIDS 0.0 -0.1 -0.1
## GDP 0.3 0.9 0.1
## Population 0.0 0.0 -0.1
## thinness..1.19.years -0.4 -0.3 -0.1
## thinness.5.9.years -0.4 -0.3 -0.1
## Income.composition.of.resources 0.4 0.4 0.1
## Schooling 0.5 0.4 0.1
## Measles BMI under.five.deaths Polio
## Life.expectancy -0.2 0.6 -0.2 0.5
## Adult.Mortality 0.0 -0.4 0.1 -0.3
## infant.deaths 0.5 -0.2 1.0 -0.2
## Alcohol -0.1 0.3 -0.1 0.2
## percentage.expenditure -0.1 0.2 -0.1 0.1
## Hepatitis.B -0.1 0.1 -0.2 0.4
## Measles 1.0 -0.2 0.5 -0.1
## BMI -0.2 1.0 -0.2 0.3
## under.five.deaths 0.5 -0.2 1.0 -0.2
## Polio -0.1 0.3 -0.2 1.0
## Total.expenditure -0.1 0.2 -0.1 0.1
## Diphtheria -0.1 0.3 -0.2 0.7
## HIV.AIDS 0.0 -0.2 0.0 -0.2
## GDP -0.1 0.3 -0.1 0.2
## Population 0.2 -0.1 0.5 0.0
## thinness..1.19.years 0.2 -0.5 0.5 -0.2
## thinness.5.9.years 0.2 -0.5 0.5 -0.2
## Income.composition.of.resources -0.1 0.5 -0.2 0.3
## Schooling -0.1 0.5 -0.2 0.4
## Total.expenditure Diphtheria HIV.AIDS GDP
## Life.expectancy 0.2 0.5 -0.6 0.4
## Adult.Mortality -0.1 -0.3 0.5 -0.3
## infant.deaths -0.1 -0.2 0.0 -0.1
## Alcohol 0.3 0.2 0.0 0.3
## percentage.expenditure 0.2 0.1 -0.1 0.9
## Hepatitis.B 0.1 0.4 -0.1 0.1
## Measles -0.1 -0.1 0.0 -0.1
## BMI 0.2 0.3 -0.2 0.3
## under.five.deaths -0.1 -0.2 0.0 -0.1
## Polio 0.1 0.7 -0.2 0.2
## Total.expenditure 1.0 0.1 0.0 0.1
## Diphtheria 0.1 1.0 -0.2 0.2
## HIV.AIDS 0.0 -0.2 1.0 -0.1
## GDP 0.1 0.2 -0.1 1.0
## Population -0.1 0.0 0.0 0.0
## thinness..1.19.years -0.3 -0.2 0.2 -0.3
## thinness.5.9.years -0.3 -0.2 0.2 -0.3
## Income.composition.of.resources 0.1 0.4 -0.2 0.4
## Schooling 0.2 0.4 -0.2 0.4
## Population thinness..1.19.years
## Life.expectancy 0.0 -0.5
## Adult.Mortality 0.0 0.3
## infant.deaths 0.5 0.5
## Alcohol 0.0 -0.4
## percentage.expenditure 0.0 -0.3
## Hepatitis.B -0.1 -0.1
## Measles 0.2 0.2
## BMI -0.1 -0.5
## under.five.deaths 0.5 0.5
## Polio 0.0 -0.2
## Total.expenditure -0.1 -0.3
## Diphtheria 0.0 -0.2
## HIV.AIDS 0.0 0.2
## GDP 0.0 -0.3
## Population 1.0 0.2
## thinness..1.19.years 0.2 1.0
## thinness.5.9.years 0.2 0.9
## Income.composition.of.resources 0.0 -0.4
## Schooling 0.0 -0.4
## thinness.5.9.years
## Life.expectancy -0.5
## Adult.Mortality 0.3
## infant.deaths 0.5
## Alcohol -0.4
## percentage.expenditure -0.3
## Hepatitis.B -0.1
## Measles 0.2
## BMI -0.5
## under.five.deaths 0.5
## Polio -0.2
## Total.expenditure -0.3
## Diphtheria -0.2
## HIV.AIDS 0.2
## GDP -0.3
## Population 0.2
## thinness..1.19.years 0.9
## thinness.5.9.years 1.0
## Income.composition.of.resources -0.4
## Schooling -0.4
## Income.composition.of.resources Schooling
## Life.expectancy 0.7 0.7
## Adult.Mortality -0.4 -0.4
## infant.deaths -0.1 -0.2
## Alcohol 0.4 0.5
## percentage.expenditure 0.4 0.4
## Hepatitis.B 0.1 0.1
## Measles -0.1 -0.1
## BMI 0.5 0.5
## under.five.deaths -0.2 -0.2
## Polio 0.3 0.4
## Total.expenditure 0.1 0.2
## Diphtheria 0.4 0.4
## HIV.AIDS -0.2 -0.2
## GDP 0.4 0.4
## Population 0.0 0.0
## thinness..1.19.years -0.4 -0.4
## thinness.5.9.years -0.4 -0.4
## Income.composition.of.resources 1.0 0.8
## Schooling 0.8 1.0
#Saving Imputed and after eda dataset in csv format.
write.csv(data1,"/Users/preetvilu/Downloads/after_eda_data.csv", row.names = FALSE)